
/******************************************************************************
* load data from spark and construct delta across geographic regions
*****************************************************************************/


*****************************************************************************
**  load collapsed mover - non-mover from spark
*****************************************************************************
di `"file extension: ${ext}"'
	
import delimited using "${dataRoot}/mv_nmv.csv", ///
	delimiters(",") varnames(1) numericcols(1/99) clear		

* describe data
describe
sum numobs, d

* double check our panel is balanced
tab num_mv
tab num_mv [w=numobs]


* convert as of date to yyyymm
replace asofdate = floor(asofdate/100)
tab asofdate

* as of date and month
qui gen  asofy = floor(asofdate/100)
qui gen  asofm = floor(asofdate - asofy*100)

* move quarters and as of date in year-quarter format
qui gen date_q = yq(floor(asofdate/100), real(substr(string(asofdate), 5,2))/3)

* group avg age to bins of 10
* age and age at mv
rename age _age
rename age_mv _age_mv
qui gen age    = min(max(10*floor(_age/10),    ${youngCut}),${oldCut})
qui gen age_mv = min(max(10*floor(_age_mv/10), ${youngCut}),${oldCut})
drop _age _age_mv

* adjust variables (for monetary variables scale and adjust for inflation)
cleanUpVars 0
drop year


* generate id per level of variation cz origin, cz destination, move quarter, age bin
sort num_mv ${varL} move_t asofdate
* quarter ids
by   num_mv ${varL} move_t: gen int id_q = _n
* variation ids
gen int varID = 1
qui replace varID = varID[_n-1]+1*(id_q==1) if _n>1
sum varID


* get cz origin and destination
gen      ${Geo} = ${Geo}_o
replace  ${Geo} = ${Geo}_d if num_mv==1 & move_t<=asofdate

* get cz origin and destination
if "$Geo"!="cz"{
	gen      cz = cz_o
	replace  cz = cz_d if num_mv==1 & move_t<=asofdate
}

*relative time of move 
gen    moveQ=date_q if move_t==asofdate
bysort varID: egen int qofmove = min(moveQ)

gen  rel_q = date_q-qofmove
drop qofmove moveQ


*Geo pair
gen od = ${Geo}_o*100000 + ${Geo}_d

*shift the relative year to start at 0		
sum rel_q
global refq         = -`r(min)'
gen rel_q_prime = rel_q+${refq}


*restrict to those who have enough data pre/post move
bysort varID: egen int min_rel_q = min(rel_q) 
bysort varID: egen int max_rel_q = max(rel_q) 



compress
di "finish organizing mover variables"




*********************************************************************
//construct delta
*********************************************************************
*calculate and plot delta of outcome variable both using all non-movers and drawn non-movers

*get rank to help calculate simple averages
sort ${Geo} ${deltaGeo} asofdate 
by   ${Geo} ${deltaGeo} asofdate: gen rank = _n



*compute delta for each variable
foreach var of varlist $keyMvVars{
	di "running `var'"

	*use non-mover to compute delta
	qui gen `var'_nomove = `var'  if num_mv==0

	*average outcome among non-movers 
	bysort ${Geo} ${deltaGeo}: egen avg_`var' = wtmean(`var'_nomove), weight(numobs)
	
	*drop temp variables
	qui drop `var'_nomove 

	*delta: destination - origin at time of move
	sort varID asofdate
	qui gen delta_`var' = avg_`var'-avg_`var'[_n-1] if move_t==asofdate & num_mv==1
	
	*summary of delta
	sum delta_`var' [aw=numobs]
	
	*fill delta in each quarter
	qui bysort varID: egen delta_`var'_cust = mean(delta_`var')

}



*********************************************************************
//save data
*********************************************************************


*save data 
compress
save "${dataRoot}/tu_mover_delta${ext}.dta", replace


keep if _n<=1
save "${dataRoot}/tu_mover_delta${ext}_cut.dta", replace



 
*time stamp
display "DONE!! $S_TIME  $S_DATE"
